Make bubble plots

setwd('~/tools/biqr/')

## library(biqr)
source('~/tools/biqr/biqr/R/biqr.R')

seq.fns <- paste0('1134071/', list.files('1134071', pattern=".seq"))
seq.fns.2F2R <- seq.fns[grepl("2F2R", seq.fns)]
seq.fns.2F2R
##  [1] "1134071/Aza2F2R1_M13R_Plate_Plate01_B08.seq"  
##  [2] "1134071/Aza2F2R10_M13R_Plate_Plate01_C09.seq" 
##  [3] "1134071/Aza2F2R11_M13R_Plate_Plate01_D09.seq" 
##  [4] "1134071/Aza2F2R12_M13R_Plate_Plate01_E09.seq" 
##  [5] "1134071/Aza2F2R2_M13R_Plate_Plate01_C08.seq"  
##  [6] "1134071/Aza2F2R3_M13R_Plate_Plate01_D08.seq"  
##  [7] "1134071/Aza2F2R4_M13R_Plate_Plate01_E08.seq"  
##  [8] "1134071/Aza2F2R5_M13R_Plate_Plate01_F08.seq"  
##  [9] "1134071/Aza2F2R6_M13R_Plate_Plate01_G08.seq"  
## [10] "1134071/Aza2F2R7_M13R_Plate_Plate01_H08.seq"  
## [11] "1134071/Aza2F2R8_M13R_Plate_Plate01_A09.seq"  
## [12] "1134071/Aza2F2R9_M13R_Plate_Plate01_B09.seq"  
## [13] "1134071/DKO12F2R1_M13R_Plate_Plate01_B11.seq" 
## [14] "1134071/DKO12F2R10_M13R_Plate_Plate01_C12.seq"
## [15] "1134071/DKO12F2R11_M13R_Plate_Plate01_D12.seq"
## [16] "1134071/DKO12F2R12_M13R_Plate_Plate01_E12.seq"
## [17] "1134071/DKO12F2R13_M13R_Plate_Plate01_F12.seq"
## [18] "1134071/DKO12F2R2_M13R_Plate_Plate01_C11.seq" 
## [19] "1134071/DKO12F2R3_M13R_Plate_Plate01_D11.seq" 
## [20] "1134071/DKO12F2R4_M13R_Plate_Plate01_E11.seq" 
##  [ reached getOption("max.print") -- omitted 29 entries ]
## extract sample names
snames.2F2R <- sub('.*/(.*)2F2R.*_(.*).seq', "\\1.\\2", seq.fns.2F2R)
snames.2F2R
##  [1] "Aza.B08"  "Aza.C09"  "Aza.D09"  "Aza.E09"  "Aza.C08"  "Aza.D08" 
##  [7] "Aza.E08"  "Aza.F08"  "Aza.G08"  "Aza.H08"  "Aza.A09"  "Aza.B09" 
## [13] "DKO1.B11" "DKO1.C12" "DKO1.D12" "DKO1.E12" "DKO1.F12" "DKO1.C11"
## [19] "DKO1.D11" "DKO1.E11"
##  [ reached getOption("max.print") -- omitted 29 entries ]
## do the alignment
ca.2F2R <- clone.aln(seq.fns.2F2R, "chr8", 11664775, 11667135, snames = snames.2F2R)
## the $m slot is the base matrix
ca.2F2R$m[1:4,1:10]
##           [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## reference "T"  "C"  "T"  "G"  "C"  "T"  "T"  "G"  "A"  "G"  
## Aza.B08   "T"  "T"  "T"  "G"  "T"  "T"  "T"  "G"  "A"  "G"  
##  [ reached getOption("max.print") -- omitted 2 rows ]
## now plot the cytosines in HCG context, clones are sorted by number of cytosine retension
plotcytosine(ca.2F2R, 'hcg')

## equivalent to using the base matrix of cytosines
m.hcg <- order.cytosine(ca.2F2R, "hcg", subset.c = T)
plotcytosine(m.hcg, 'hcg')

## one can do some matrix splicing to get rid of certain column (here the first and last column)
m.hcg.2 <- m.hcg[,-c(1,ncol(m.hcg))]
plotcytosine(m.hcg.2, 'hcg')

## or get rid of certain clone
m.hcg.3 <- m.hcg.2[-grep('Aza', rownames(m.hcg.2)),]
plotcytosine(m.hcg.3, 'hcg')

## to show the sample name and confirm they are indeed deleted
## before
plotcytosine(m.hcg.2, 'hcg', show.sname = T, mar.left=0.1)

## after
plotcytosine(m.hcg.3, 'hcg', show.sname = T, mar.left=0.1)

## to show GCH
m.gch <- order.cytosine(ca.2F2R, 'gch', subset.c = T)
plotcytosine(m.gch, 'gch', show.sname = T, mar.left=0.1)

## matching the order of GCH with HCG
m.gch.2 <- m.gch[rownames(m.hcg.3),]
plotcytosine(m.gch.2, 'gch', show.sname = T, mar.left=0.1)

## column names are the cytosine coordinates from the original basematrix
## we can use that to have both GCH and HCG focus on the same region
gch.locs <- as.numeric(colnames(m.gch.2))
plotcytosine(m.gch.2, 'gch', show.sname = T, mar.left=0.1, radius=5)

plotcytosine(m.hcg.3, 'hcg', add=T, loc.beg=min(gch.locs), loc.end=max(gch.locs), radius=2)

## combined with Wheatmap, one can juxtapose the two plots in many ways
load_all('~/tools/wheatmap/wheatmap/')
## Loading wheatmap
grid.newpage()
WGrob(plotcytosine(m.gch.2, 'gch', show.sname = T, mar.left=0.1, radius=5, draw = F)) +
  WGrob(plotcytosine(m.hcg.3, 'hcg', show.sname=T, mar.left=0.1, loc.beg=min(gch.locs), loc.end=max(gch.locs), radius=5, draw = F), Beneath())

WGrob(plotcytosine(m.gch.2, 'gch', show.sname = T, mar.left=0.15, radius=5, draw = F)) +
  WGrob(plotcytosine(m.hcg.3, 'hcg', show.sname=T, mar.left=0.1, loc.beg=min(gch.locs), loc.end=max(gch.locs), radius=5, draw = F), RightOf(pad=0.05))

show alignment

format.m(ca.2F2R$m[1:10,1:100])
##            reference TCTGCTTGAGTCTATGGAGGAAAAACTCCGCGGGGTCCGCGATTCCCATGGCCGCAGCCGCCTGCGGCACCAAGGCCATG
##              Aza.B08 TTTGTTTGAGTTTATGGAGGAAAAATTTTGTGGGGTTTGTGATTTTTATGGTTGTAGTTGTTTGTGGTATTAAGGTTATG
##              Aza.D09 TTTGTTTGAGTTTATGGAGGAAAAATTTTGTGGGGTTTGTGATTTTTATGGTCGTAGTCGTTTGCGGTATTAAGGCTATG
##              Aza.E09 TTTGTTTGAGTTTATGGAGGAAAAATTTTGCGGGGTTTGCGATTTTTATGGTTGTAGTTGTTTGTGGTATTAAGGTTATG
##              Aza.C08 TTTGTTTGAGTTTATGGAGGAAAAATTTTGTGGGGTTTGTGATTTTTATGGTTGTAGTTGTTTGTGGTATTAAGGTTATG
##              Aza.D08 -TTGTTTGAGTTTATGGAGGAAAAATTTTGCGGGGTTTGCGATTTTTATGGCCGCAGCCGCTTGCGGCATTAAGGCTATG
##              Aza.F08 TTTGTTTGAGTTTATGGAGGAAAAATTTTGTGGGGTTTGTGATTTTTATGGTCGTAGTTGTTTGTGGTATTAAGGTTATG
##              Aza.G08 TTTGTTTGAGTTTATGGAGGAAAAATTTTGTGGGGTTCGTGATTTTTATGGTCGTGGTCGTTTGTGGCATTAAGGTTATG
##              Aza.H08 TTTGTTTGAGTTTATGGAGGAAAAATTTCGCGGGGTTCGCGATTTTTATGGTCGTAGTCGTTTGCGGTATTAAGGTTATG
##              Aza.A09 TTTGTTTGAGTTTATGGAGGAAAAATTTCGCGGGGTTCGCGATTTTTATGGCCGCAGTCGTTTGCGGCATTAAGGCTATG
## 
##            reference GCCCTCTTCAAGCGCACCTT
##              Aza.B08 GTTTTTTTTAAGTGTATTTT
##              Aza.D09 GCTTTTTTTAAGCGTATTTT
##              Aza.E09 GTTTTTTTTAAGTATATTTT
##              Aza.C08 GTTTTTTTTAAGCGTATTTT
##              Aza.D08 GCTCTTTTTAAGCGCATTTT
##              Aza.F08 GTTTTTTTTAAGCGTATTTT
##              Aza.G08 GCTTTTTTTAAGCGCATTTT
##              Aza.H08 GTTTTTTTTAAGCGTATTTT
##              Aza.A09 GTTTTTTTTAAGCGTATTTT